library(tidyverse)
library(janitor)
library(tsibble)
library(lubridate)
library(ggthemes)
library(tidyverse)
library(sf)
library(rnaturalearth)
library(jsonlite)
library(leaflet)
library(janitor)
library(sp)
wait_times <- read_csv("../Dashboard_project/raw_data/wait_times_data/monthly_ae_waitingtimes_202209.csv") %>% clean_names()
clean_wait_times <- wait_times %>%
select(-c(number_of_attendances_episode_qf, number_meeting_target_episode_qf,
discharge_destination_admission_to_same_qf, discharge_destination_other_specialty_qf,
discharge_destination_residence_qf, discharge_destination_unknown_qf,
discharge_destination_transfer_qf, attendance_greater8hrs_qf,
attendance_greater12hrs_qf)) %>%
rename("healthboard" = "hbt") %>%
mutate(country = recode(country,
"S92000003" = "Scotland"))
hospital_location <- read_csv("../Dashboard_project/raw_data/wait_times_data/hospital_flagged20211216.csv") %>%
clean_names()
location_update_df <- left_join(clean_wait_times, hospital_location,
by = c("treatment_location" = "location"))
location_update_df <- location_update_df %>%
select(-c(hscpqf, caqf, int_zone_qf, data_zone_qf, based_on_postcode,
postcode, address_line, hb, ca, int_zone, data_zone))
scotland_area <- read_csv("../Dashboard_project/raw_data/wait_times_data/scotland_area.csv") %>% clean_names()
wait_times_clean <- left_join(location_update_df, scotland_area,
by = c("hscp" = "hscp")) %>%
select(-c(hscp, hscp_date_enacted, hscp_date_archived, hb, hb_name,
hb_date_enacted, hb_date_archived, country.y)) %>%
select(month, country.x, treatment_location, healthboard, hscp_name,
location_name, everything()) %>%
rename("country" = "country.x") %>%
mutate(month = ym(month),
month = format_ISO8601(month, precision = "ym")) %>%
rename("year_month" = "month")
wait_times_clean <- wait_times_clean %>%
mutate(year = ym(year_month),
year = year(year),
month = ym(year_month),
month = month(month, label = TRUE, abbr = FALSE)) %>%
# quarter = ym(year_month),
# quarter = yearquarter(quarter)) %>%
select(year_month, year, month, country:department_type, everything()) %>%
mutate(healthboard = recode(healthboard,
"S08000015" = "NHS Ayrshire and Arran",
"S08000016" = "NHS Borders",
"S08000017" = "NHS Dumfries and Galloway",
"S08000019" = "NHS Forth Valley",
"S08000020" = "NHS Grampian",
"S08000022" = "NHS Highland",
"S08000024" = "NHS Lothian",
"S08000025" = "NHS Orkney",
"S08000026" = "NHS Shetland",
"S08000028" = "NHS Western Isles",
"S08000029" = "NHS Fife",
"S08000030" = "NHS Tayside",
"S08000031" = "NHS Greater Glasgow and Clyde",
"S08000032" = "NHS Lanarkshire"))
wait_times_clean
geometry_data <- sf::st_read(here:here("../raw_data/Shape_data/SG_NHS_HealthBoards_2019.shp")) %>%
clean_names()
Error in sf::st_read(here:here("../raw_data/Shape_data/SG_NHS_HealthBoards_2019.shp")) :
object 'here' not found
wait_times_clean <- wait_times_clean %>%
mutate(percent_target_met = number_meeting_target_aggregate/number_of_attendances_aggregate,
percent_target_met_ep = number_meeting_target_episode/number_of_attendances_episode)
wait_times_clean
# percentage of attendance target met (aggregate)
wait_times_clean %>%
group_by(year, healthboard) %>%
summarise(n = mean(percent_target_met)) %>%
ggplot(aes(year, n, colour = healthboard, group = healthboard)) +
scale_y_continuous(labels=scales::percent) +
geom_line() +
theme_classic() +
labs(x = "Year", y = "Percentage", title = "Healthboard Attendance Target Met (Percentage)")
# percentage of attendance target met (episode)
wait_times_clean %>%
group_by(year, healthboard) %>%
summarise(n = mean(percent_target_met_ep)) %>%
ggplot(aes(year, n, colour = healthboard, group = healthboard)) +
scale_y_continuous(labels=scales::percent) +
geom_line() +
theme_classic() +
labs(x = "Year", y = "Percentage", title = "Healthboard Attendance Target Met (Percentage)")
wait_times_clean
write_csv(x = wait_times_clean, file = "../clean_data/wait_times_clean.csv")
write_csv(x = dropped_joined_waiting, file = "../clean_data/map_data_waiting_times.csv")
st_write(dropped_joined_waiting, here::here("../clean_data/map_data_waiting_t.geojson"), append = TRUE)
boxplot(wait_times_clean$attendance_greater8hrs ~ wait_times_clean$month)
ts_wait_time <- wait_times_clean %>%
mutate(year_month = yearmonth(x = year_month)) %>%
as_tsibble(key = c(healthboard, hscp_name, location_name), index = year_month)
ts_wait_time <- wait_times_clean %>%
select(year_month, number_meeting_target_aggregate)
wait_times_clean %>%
select(year_month, treatment_location, healthboard, hscp_name, location_name,
number_meeting_target_aggregate) %>%
mutate(year_month = yearmonth(x = year_month)) %>%
as_tsibble(key = c(healthboard, treatment_location, hscp_name, location_name), index = year_month)
tsib_wait_time <- ts_wait_time %>%
rowid_to_column(var = "id") %>%
mutate(year_month = yearmonth(year_month)) %>%
as_tsibble(index = year_month, key = id)
wait_tsib <- tsib_wait_time %>%
index_by(year_month) %>%
summarise(avg_target_met_agg = mean(number_meeting_target_aggregate))
wait_tsib %>%
autoplot(avg_target_met_agg)
## Rolling wait times
## doesnt work
rolling_wait <- ts_wait_time %>%
group_by(year_month) %>%
summarise(avg_meeting_target = mean(number_meeting_target_aggregate))
roll_wait <- rolling_wait %>%
mutate(temp_moving_avg = slide_dbl(
.x = avg_meeting_target,
.f = ~ mean(., na.rm = TRUE),
.before = 1000,
.after = 1000
)
)
roll_wait
ggplot(roll_wait) +
geom_line(aes(x = year_month, y = avg_meeting_target), colour = "grey") +
geom_line(aes(x = year_month, y = temp_moving_avg), colour = "red")
wait_tsib <- tsib_wait_time %>%
index_by(year_month) %>%
summarise(avg_target_met_agg = mean(number_meeting_target_aggregate))
week_fit <- wait_tsib %>%
model(
# snaive = SNAIVE(avg_target_met_agg ~ lag("year_month")),
# mean_model = MEAN(avg_target_met_agg),
arima = ARIMA(avg_target_met_agg)
)
forecast_week <- week_fit %>%
fabletools::forecast(h = 120)
forecast_week
forecast_week %>%
autoplot(wait_tsib, level = NULL) +
ggtitle("10 Year Forecast of NHS Targets Met") +
xlab("Year") +
ylab("Average Target Met") +
guides(colour = guide_legend(title = "Forecast")) +
theme_classic()
## Prophet prediction
wait_prophet <- wait_tsib %>%
mutate(ds = year_month,
y = avg_target_met_agg)
wait_prophet <- column_to_rownames(wait_prophet, var = "ds")
wait_prophet <- mutate(wait_prophet, ds = year_month)
prophet <- prophet(wait_prophet)
future <- make_future_dataframe(prophet, periods = 600)
forecast_p <- predict(prophet, future)
plot(prophet, forecast_p) +
xlab("Time") +
ylab("target_met_agg") +
labs(title = "Prophet forecast of Target Met Aggregate")
library(slider)
prophet
wait_tsib
## Look at subplots tommorrow - subseries/ggseason
## need to put an id into the main dataset to turn it into a tsibble, then can
## do one of these forecasts
---
title: "R Notebook"
output: html_notebook
---

```{r}
library(tidyverse)
library(janitor)
library(tsibble)
library(lubridate)
library(ggthemes)
library(tidyverse)
library(sf)
library(rnaturalearth)
library(jsonlite)
library(leaflet)
library(janitor)
library(sp)
```

```{r}
wait_times <- read_csv("../Dashboard_project/raw_data/wait_times_data/monthly_ae_waitingtimes_202209.csv") %>% clean_names()
```

```{r}
clean_wait_times <- wait_times %>% 
  select(-c(number_of_attendances_episode_qf, number_meeting_target_episode_qf,
            discharge_destination_admission_to_same_qf, discharge_destination_other_specialty_qf,
            discharge_destination_residence_qf, discharge_destination_unknown_qf,
            discharge_destination_transfer_qf, attendance_greater8hrs_qf,
            attendance_greater12hrs_qf)) %>% 
  rename("healthboard" = "hbt") %>% 
  mutate(country = recode(country,
                          "S92000003" = "Scotland"))

```


```{r}
hospital_location <- read_csv("../Dashboard_project/raw_data/wait_times_data/hospital_flagged20211216.csv") %>% 
  clean_names()
```


```{r}
location_update_df <- left_join(clean_wait_times, hospital_location, 
                             by = c("treatment_location" = "location"))

location_update_df <- location_update_df %>% 
  select(-c(hscpqf, caqf, int_zone_qf, data_zone_qf, based_on_postcode, 
                                              postcode, address_line, hb, ca, int_zone, data_zone))
```

```{r}
scotland_area <- read_csv("../Dashboard_project/raw_data/wait_times_data/scotland_area.csv") %>% clean_names()
```

```{r}
wait_times_clean <- left_join(location_update_df, scotland_area, 
                              by = c("hscp" = "hscp")) %>% 
  select(-c(hscp, hscp_date_enacted, hscp_date_archived, hb, hb_name, 
            hb_date_enacted, hb_date_archived, country.y)) %>% 
  select(month, country.x, treatment_location, healthboard, hscp_name, 
         location_name, everything()) %>% 
  rename("country" = "country.x") %>% 
  mutate(month = ym(month),
         month = format_ISO8601(month, precision = "ym")) %>% 
  rename("year_month" = "month")
```

```{r}
wait_times_clean <- wait_times_clean %>%
  mutate(year = ym(year_month),
         year = year(year),
         month = ym(year_month),
         month = month(month, label = TRUE, abbr = FALSE)) %>% 
         # quarter = ym(year_month),
         # quarter = yearquarter(quarter)) %>% 
  select(year_month, year, month, country:department_type, everything()) %>% 
  mutate(healthboard = recode(healthboard,
                     "S08000015" = "NHS Ayrshire and Arran",
                     "S08000016" = "NHS Borders",
                     "S08000017" = "NHS Dumfries and Galloway",
                     "S08000019" = "NHS Forth Valley",
                     "S08000020" = "NHS Grampian",
                     "S08000022" = "NHS Highland",
                     "S08000024" = "NHS Lothian",
                     "S08000025" = "NHS Orkney",
                     "S08000026" = "NHS Shetland",
                     "S08000028" = "NHS Western Isles",
                     "S08000029" = "NHS Fife",
                     "S08000030" = "NHS Tayside",
                     "S08000031" = "NHS Greater Glasgow and Clyde",
                     "S08000032" = "NHS Lanarkshire"))
  
```

```{r}
wait_times_clean
```


```{r}
geometry_data <- sf::st_read(here::here("../Day 5/wait_times_data/SG_NHS_HealthBoards_2019.shp")) %>%
  clean_names()

geometry_data <- sf::st_read(here:here("../raw_data/Shape_data/SG_NHS_HealthBoards_2019.shp")) %>%
  clean_names()

simple_geo_data <- st_simplify(geometry_data, preserveTopology = FALSE, dTolerance = 1000)
simple_geo_data

ukgrid <- "+init=epsg:27700"
latlong <- "+init=epsg:4326"

lat_long_geo_data <- st_transform(simple_geo_data, crs = latlong)

lat_long_geo_data

joined_waiting <-  wait_times_clean %>%
  left_join(lat_long_geo_data, by = c("healthboard" = "hb_code"))

joined_waiting <- joined_waiting %>% 
  mutate(healthboard = recode(healthboard,
                     "S08000015" = "NHS Ayrshire and Arran",
                     "S08000016" = "NHS Borders",
                     "S08000017" = "NHS Dumfries and Galloway",
                     "S08000019" = "NHS Forth Valley",
                     "S08000020" = "NHS Grampian",
                     "S08000022" = "NHS Highland",
                     "S08000024" = "NHS Lothian",
                     "S08000025" = "NHS Orkney",
                     "S08000026" = "NHS Shetland",
                     "S08000028" = "NHS Western Isles",
                     "S08000029" = "NHS Fife",
                     "S08000030" = "NHS Tayside",
                     "S08000031" = "NHS Greater Glasgow and Clyde",
                     "S08000032" = "NHS Lanarkshire"))

joined_waiting["geometry"] %>% 
  pull()

dropped_joined_waiting <- joined_waiting %>%
  drop_na()

dropped_joined_waiting <- st_as_sf(dropped_joined_waiting)

dropped_joined_waiting

st_geometry(dropped_joined_waiting)

dropped_joined_waiting %>% 
  ggplot(aes(fill = percent_target_met_ep)) +
  geom_sf() +
  theme_map() +
  theme(legend.position = "right")

# data frame for plotting geometry points

dropped_joined_waiting <- dropped_joined_waiting %>% 
  select(year_month:attendance_greater12hrs, geometry) %>% 
  mutate(percent_target_met = number_meeting_target_aggregate/number_of_attendances_aggregate * 100,
         percent_target_met_ep = number_meeting_target_episode/number_of_attendances_episode *100)

dropped_joined_waiting

map_variables <- dropped_joined_waiting %>% select(percent_target_met, percent_target_met_ep)
  
```



```{r}
wait_times_clean <- wait_times_clean %>% 
  mutate(percent_target_met = number_meeting_target_aggregate/number_of_attendances_aggregate,
         percent_target_met_ep = number_meeting_target_episode/number_of_attendances_episode)

wait_times_clean

# percentage of attendance target met (aggregate)
  
wait_times_clean %>%   
  group_by(year, healthboard) %>% 
  summarise(n = mean(percent_target_met)) %>% 
  ggplot(aes(year, n, colour = healthboard, group = healthboard)) +
  scale_y_continuous(labels=scales::percent) +
  geom_line() +
  theme_classic() +
  labs(x = "Year", y = "Percentage", title = "Healthboard Attendance Target Met (Percentage)")

# percentage of attendance target met (episode)

wait_times_clean %>%   
  group_by(year, healthboard) %>% 
  summarise(n = mean(percent_target_met_ep)) %>% 
  ggplot(aes(year, n, colour = healthboard, group = healthboard)) +
  scale_y_continuous(labels=scales::percent) +
  geom_line() +
  theme_classic() +
  labs(x = "Year", y = "Percentage", title = "Healthboard Attendance Target Met (Percentage)")

wait_times_clean
  
```

```{r}
write_csv(x = wait_times_clean, file = "../clean_data/wait_times_clean.csv")

write_csv(x = dropped_joined_waiting, file = "../clean_data/map_data_waiting_times.csv")
```

```{r}
st_write(dropped_joined_waiting, here::here("../clean_data/map_data_waiting_t.geojson"), append = TRUE)
```




```{r}
dropped_joined_waiting
```

```{r}
boxplot(wait_times_clean$attendance_greater8hrs ~ wait_times_clean$month)
```

```{r}
ts_wait_time <- wait_times_clean %>% 
  mutate(year_month = yearmonth(x = year_month)) %>% 
  as_tsibble(key = c(healthboard, hscp_name, location_name), index = year_month)

ts_wait_time <- wait_times_clean %>%
  select(year_month, number_meeting_target_aggregate)

wait_times_clean %>% 
  select(year_month, treatment_location, healthboard, hscp_name, location_name,
         number_meeting_target_aggregate) %>% 
  mutate(year_month = yearmonth(x = year_month)) %>% 
  as_tsibble(key = c(healthboard, treatment_location, hscp_name, location_name), index = year_month)

tsib_wait_time <- ts_wait_time %>% 
  rowid_to_column(var = "id") %>% 
  mutate(year_month = yearmonth(year_month)) %>% 
  as_tsibble(index = year_month, key = id)



wait_tsib <- tsib_wait_time %>% 
  index_by(year_month) %>% 
  summarise(avg_target_met_agg = mean(number_meeting_target_aggregate))

wait_tsib %>% 
  autoplot(avg_target_met_agg) 

## Rolling wait times 

## doesnt work 

rolling_wait <- ts_wait_time %>% 
  group_by(year_month) %>% 
  summarise(avg_meeting_target = mean(number_meeting_target_aggregate))

roll_wait <- rolling_wait %>% 
  mutate(temp_moving_avg = slide_dbl(
    .x = avg_meeting_target, 
    .f = ~ mean(., na.rm = TRUE),
    .before = 1000,
    .after = 1000
  )
  )

roll_wait

ggplot(roll_wait) + 
  geom_line(aes(x = year_month, y = avg_meeting_target), colour = "grey") + 
  geom_line(aes(x = year_month, y = temp_moving_avg), colour = "red")
  
  
```

```{r}
wait_tsib <- tsib_wait_time %>% 
  index_by(year_month) %>% 
  summarise(avg_target_met_agg = mean(number_meeting_target_aggregate))

week_fit <- wait_tsib %>% 
  model(
    # snaive = SNAIVE(avg_target_met_agg ~ lag("year_month")),
    # mean_model = MEAN(avg_target_met_agg),
    arima = ARIMA(avg_target_met_agg)
  )

forecast_week <- week_fit %>% 
  fabletools::forecast(h = 120)
forecast_week

forecast_week %>%
  autoplot(wait_tsib, level = NULL) +
  ggtitle("10 Year Forecast of NHS Targets Met") +
  xlab("Year") +
  ylab("Average Target Met") +
  guides(colour = guide_legend(title = "Forecast")) +
  theme_classic()

## Prophet prediction

      wait_prophet <- wait_tsib %>% 
        mutate(ds = year_month,
               y = avg_target_met_agg)
      
      wait_prophet <- column_to_rownames(wait_prophet, var = "ds") 
      
      wait_prophet <-  mutate(wait_prophet, ds = year_month)
      
      prophet <- prophet(wait_prophet)
      
      future <- make_future_dataframe(prophet, periods = 600)
      
      forecast_p <- predict(prophet, future)
      
      plot(prophet, forecast_p) +
        xlab("Time") +
        ylab("target_met_agg") +
        labs(title = "Prophet forecast of Target Met Aggregate")
        
      
library(slider)
      

prophet

wait_tsib


## Look at subplots tommorrow - subseries/ggseason 

## need to put an id into the main dataset to turn it into a tsibble, then can
## do one of these forecasts

```

```{r}
tsib_wait_time
```

